package org.hagen.mmia.yars.engine.corpora;

import java.util.ArrayList;

/*
 * very simple abstract representation
 */
public class Abstract{

	// actual data
	public String	ID 		= "";
	public Integer	index 	= 0;
	public String	title 	= "";
	public String	content = "";

	// temporary data
	public String 			 tmp_ID = null;
	public ArrayList<String> tmp_title = null;
	public ArrayList<String> tmp_content = null;
	
	public void print() {
		System.out.println( "number : " + ID + " index : " + index );
		System.out.println( "title  : " + title );
		System.out.println( "content: " + content );
		System.out.println( "*******************" );
	}
	
	private boolean verbose = false;
	
	/*
	 * transformation from temporary into actual data
	 */
	public void normalize( boolean stop, boolean stem ) {

		// normalize ID
		String stringId = (tmp_ID.split(" ")[1]);
		ID = stringId;

		// normalize title
		for (int i = 0; i < tmp_title.size(); i++) {
			String line = tmp_title.get(i);
			
			// replace special characters
			line = line.replaceAll("[\\,.\\s/\\-()\"='0-9%]+", " ");
		
			// stopping
			if( stop ) {
				line = Reader.doStopping( line );
			}
			// stemming
			if( stem ) {
				line = Reader.doStemming( line );
			}
			
			title += " " + line.trim();
		}

		// normalize content
		for (int i = 0; i < tmp_content.size(); i++) {
			String line = tmp_content.get(i);

			// stopping
			if( stop ) {
				line = Reader.doStopping( line );
			}
			// stemming
			if( stem ) {
				line = Reader.doStemming( line );
			}
			
			// replace special characters
			line = line.replaceAll("[\\,.\\s/\\-()\"='0-9%]+", " ");
			
			content += " " + line.trim();
		}

		// destruction
		tmp_ID = null;
		tmp_title = null;
		tmp_content = null;
		
		if( verbose ) {
			print();
		}
	}
}

